"""
Centralized file upload validation for security.

Provides validation for file uploads to prevent:
- Memory exhaustion attacks (file size limits)
- Malicious file uploads (structure validation)
- Resource abuse (file count limits)
- Type confusion attacks (MIME validation)
"""

import io
from typing import Optional, Tuple

import pdfplumber
from loguru import logger


class FileUploadValidator:
    """Centralized file upload validation for security."""

    # Security constants
    MAX_FILE_SIZE = 50 * 1024 * 1024  # 50MB per file
    MAX_FILES_PER_REQUEST = 200  # Maximum number of files in single request
    PDF_MAGIC_BYTES = b"%PDF"  # PDF file signature
    ALLOWED_MIME_TYPES = {"application/pdf"}

    @staticmethod
    def validate_file_size(
        content_length: Optional[int], file_content: Optional[bytes] = None
    ) -> Tuple[bool, Optional[str]]:
        """
        Validate file size to prevent memory exhaustion attacks.

        Args:
            content_length: Content-Length header value (if available)
            file_content: Actual file bytes (if already read)

        Returns:
            Tuple of (is_valid, error_message)
        """
        # Check Content-Length header first (before reading file)
        if content_length is not None:
            if content_length > FileUploadValidator.MAX_FILE_SIZE:
                size_mb = content_length / (1024 * 1024)
                max_mb = FileUploadValidator.MAX_FILE_SIZE / (1024 * 1024)
                return (
                    False,
                    f"File too large: {size_mb:.1f}MB (max: {max_mb}MB)",
                )

        # Check actual file size if content is provided
        if file_content is not None:
            actual_size = len(file_content)
            if actual_size > FileUploadValidator.MAX_FILE_SIZE:
                size_mb = actual_size / (1024 * 1024)
                max_mb = FileUploadValidator.MAX_FILE_SIZE / (1024 * 1024)
                return (
                    False,
                    f"File too large: {size_mb:.1f}MB (max: {max_mb}MB)",
                )

        return True, None

    @staticmethod
    def validate_file_count(file_count: int) -> Tuple[bool, Optional[str]]:
        """
        Validate number of files to prevent resource abuse.

        Args:
            file_count: Number of files in the request

        Returns:
            Tuple of (is_valid, error_message)
        """
        if file_count > FileUploadValidator.MAX_FILES_PER_REQUEST:
            return (
                False,
                f"Too many files: {file_count} (max: {FileUploadValidator.MAX_FILES_PER_REQUEST})",
            )

        if file_count <= 0:
            return False, "No files provided"

        return True, None

    @staticmethod
    def validate_mime_type(
        filename: str, file_content: bytes
    ) -> Tuple[bool, Optional[str]]:
        """
        Validate file MIME type and extension.

        Args:
            filename: Original filename
            file_content: File content bytes

        Returns:
            Tuple of (is_valid, error_message)
        """
        # Check file extension
        if not filename.lower().endswith(".pdf"):
            return (
                False,
                f"Invalid file type: {filename}. Only PDF files allowed",
            )

        # Check PDF magic bytes (file signature)
        if not file_content.startswith(FileUploadValidator.PDF_MAGIC_BYTES):
            return (
                False,
                f"Invalid PDF file: {filename}. File signature mismatch",
            )

        return True, None

    @staticmethod
    def validate_pdf_structure(
        filename: str, file_content: bytes
    ) -> Tuple[bool, Optional[str]]:
        """
        Validate PDF structure to detect malicious or corrupted files.

        This goes beyond just checking the magic bytes and actually attempts
        to parse the PDF structure.

        Args:
            filename: Original filename
            file_content: File content bytes

        Returns:
            Tuple of (is_valid, error_message)
        """
        try:
            # Attempt to open and parse the PDF structure
            with pdfplumber.open(io.BytesIO(file_content)) as pdf:
                # Check if PDF has pages
                if not pdf.pages or len(pdf.pages) == 0:
                    return False, f"Invalid PDF: {filename}. No pages found"

                # Try to access first page metadata to ensure it's parseable
                first_page = pdf.pages[0]
                _ = first_page.width  # Access basic metadata
                _ = first_page.height

            return True, None

        except Exception as e:
            logger.warning(
                f"PDF structure validation failed for {filename}: {e}"
            )
            return False, f"Invalid or corrupted PDF file: {filename}"

    @classmethod
    def validate_upload(
        cls,
        filename: str,
        file_content: bytes,
        content_length: Optional[int] = None,
    ) -> Tuple[bool, Optional[str]]:
        """
        Comprehensive validation for a single file upload.

        Runs all validation checks in sequence. Stops at first failure.

        Args:
            filename: Original filename
            file_content: File content bytes
            content_length: Content-Length header (if available)

        Returns:
            Tuple of (is_valid, error_message)
        """
        # 1. Validate file size
        is_valid, error = cls.validate_file_size(content_length, file_content)
        if not is_valid:
            return is_valid, error

        # 2. Validate MIME type and extension
        is_valid, error = cls.validate_mime_type(filename, file_content)
        if not is_valid:
            return is_valid, error

        # 3. Validate PDF structure (more thorough check)
        is_valid, error = cls.validate_pdf_structure(filename, file_content)
        if not is_valid:
            return is_valid, error

        return True, None
